#! /usr/bin/python
#
# Stephen Poletto (spoletto)
# Peter Wilmot (pbwilmot)
# CSCI1580 - Web Search
# Spring 2011 - Brown University
#

from documentVector import *
import operator
import numpy
import math
import sys

if (len(sys.argv) != 2):
    print ""
    print "usage: createSVMTestFile <vecrep>"
    print ""
    sys.exit()

vecrepFile = open(sys.argv[1], "r")
outputFile = open('test.dat', 'wb')

docIDToVector = {}
for vecrep in vecrepFile.readlines():
    vecrep = vecrep.rstrip("\n")
    vector = DocumentVector(vecrep[:-1], 0)
    docIDToVector[vector.docID] = vector
vecrepFile.close()

for docID in docIDToVector:
    vector = docIDToVector[docID]
    outputFile.write("0 ")
    
    featureIDCounts = vector.featureIDToNumOccurrences 
    sorted_featureIDs = sorted(featureIDCounts.iteritems(), key=operator.itemgetter(0))
    for featureID in sorted_featureIDs:
        outputFile.write(str(featureID[0] + 1) + ":" + str(featureID[1]) + " ")
    outputFile.write("\n")